/*

This code combines MECS, ASM, CMF, and ASM fuel Trailers

*/


use "/projects/data/dataSTATA/combined/combined_iv.dta", clear
merge m:1 bestnaics year using "/projects/data/dataSTATA/economic/mecs_merged_collapsed.dta", assert(1 2 3) keep(1 3) nogen


* interpolate missing years emissions coefficients
foreach var in co2_cf_raw  btu_cf_raw {
	bys bestnaics: ipolate `var' year, gen(`var'_i) epolate
	replace `var'   = 0   if `var' < 0
	replace `var'_i = 0 if `var'_i < 0
	la var `var'_i "`var', interpolated for missing years"
}


*>----------------- calculate btu/co2 from electricity in ASM/CMF -------------<
** Units conversions performed in secondary script for disclosure purposes

do projects/programs/codeSTATA/A_cleaning/4_combine_unitsconversionsA.do

*>----------------- using imputed fuel coefs ----------------------------------<

* co2 and btu from coefs
gen co2_raw_m   = co2_cf_raw * cf * 1000 
gen co2_raw_m_i = co2_cf_raw_i * cf * 1000
gen btu_raw_m   = btu_cf_raw * cf * 1000
gen btu_raw_m_i = btu_cf_raw_i * cf * 1000

* energy intensity
gen btu_tot_m   = elec_btu_m + btu_raw_m 
gen btu_tot_m_i = elec_btu_m + btu_raw_m_i
gen co2_tot_m   = elec_co2_m + co2_raw_m
gen co2_tot_m_i = elec_co2_m + co2_raw_m_i
 
* prices
gen co2_price   = cfe / (elec_co2_m + co2_raw_m) * 1000 // cfe is in 1000
gen co2_price_i = cfe / (elec_co2_m + co2_raw_m_i) * 1000 
gen btu_price   = cfe / (elec_btu_m + btu_raw_m) * 1000 
gen btu_price_i = cfe / (elec_btu_m + btu_raw_m_i) * 1000 

la var co2_price   "Fuel Cost per kg CO2 (USD 2011)"
la var co2_price_i "Fuel Cost per kg CO2 (USD 2011), incl. interpolated years" 
la var btu_price   "Fuel Cost per million BTU (USD, 2011)"
la var btu_price_i "Fuel Cost per million BTU (USD, 2011), incl. interpolated years)"

gen btu_int   = btu_tot_m / (tvs * 1000)
gen btu_int_i = btu_tot_m_i / (tvs * 1000)
gen co2_int   = co2_tot_m / (tvs * 1000)
gen co2_int_i = co2_tot_m_i / (tvs * 1000) 

la var co2_int   "CO2 Energy Intensity (kg CO2 per USD 2011)"
la var co2_int_i "CO2 Energy Intensity (kg CO2 per USD 2011), incl. interpolated years" 
la var btu_int   "BTU Energy Intensity (million BTU per USD revenue, 2011)"
la var btu_int_i "BTU Energy Intensity (million BTU per USD revenue, 2011), incl. interpolated years)"

foreach var in co2_price co2_price_i co2_int co2_int_i btu_int btu_int_i btu_price btu_price_i {
	gen l`var' = log(`var')
	la var l`var' "log of `var'"

}

* calculate establishment-specific cost shares
gen elec_co2_share = elec_co2_m / (elec_co2_m + co2_raw_m)
gen elec_co2_share_i = elec_co2_m / (elec_co2_m + co2_raw_m_i)
gen elec_btu_share = elec_btu_m / (elec_btu_m + btu_raw_m)
gen elec_btu_share_i = elec_btu_m / (elec_btu_m + btu_raw_m_i)
	
* calculate establishment-specific cost shares and fuel coefs in initial years
** initial energy prices if you are in the cmf or asm in the first year you are in the lbd -- then we know exactly what your price is
preserve
keep if firstyear == year
foreach var in elec_co2_share elec_btu_share elec_co2_share_i elec_btu_share_i co2_cf_raw  btu_cf_raw co2_cf_raw_i btu_cf_raw_i {
	ren `var' `var'_init
}

* lbdnum uniquely identifies plants over time
keep firstyear elec_co2_share_init elec_btu_share_init elec_co2_share_i_init elec_btu_share_i_init co2_cf_raw_init  btu_cf_raw_init co2_cf_raw_i_init btu_cf_raw_i_init lbdnum
tempfile init1
sa `init1', replace
restore


** initial energy price average by firstyear state bestnaics
preserve
keep if year == firstyear
collapse (mean) elec_co2_share elec_btu_share elec_co2_share_i elec_btu_share_i co2_cf_raw  btu_cf_raw co2_cf_raw_i btu_cf_raw_i, by(year bestnaics fipsst)
foreach var in elec_co2_share elec_btu_share elec_co2_share_i elec_btu_share_i co2_cf_raw  btu_cf_raw co2_cf_raw_i btu_cf_raw_i{
	ren `var' `var'_init_avg
}
ren year firstyear
tempfile init2
sa `init2', replace
restore

** initial energy price average by firstyear state 
preserve
keep if year == firstyear
* collapse by year state
collapse (mean) elec_co2_share elec_btu_share elec_co2_share_i elec_btu_share_i co2_cf_raw  btu_cf_raw co2_cf_raw_i btu_cf_raw_i, by(year fipsst)
foreach var in elec_co2_share elec_btu_share elec_co2_share_i elec_btu_share_i co2_cf_raw  btu_cf_raw co2_cf_raw_i btu_cf_raw_i{
	ren `var' `var'_init_avg2
}
ren year firstyear
tempfile init3
sa `init3', replace
restore

merge m:1 firstyear lbdnum               using `init1', assert(1 2 3) keep(1 3) nogen
merge m:1 firstyear bestnaics fipsst     using `init2', assert(1 2 3) keep(1 3) nogen
merge m:1 firstyear fipsst               using `init3', assert(1 2 3) keep(1 3) nogen

foreach var in elec_co2_share_init elec_co2_share_i_init elec_btu_share_init elec_btu_share_i_init co2_cf_raw_init  btu_cf_raw_init co2_cf_raw_i_init btu_cf_raw_i_init {
	replace  `var'  =  `var'_avg    if mi(`var')
	replace  `var'  =  `var'_avg2   if mi(`var')
}

* convert electricity price to price per co2 instead of kwh --> $ / kwh to $ / kg CO2

do projects/programs/codeSTATA/A_cleaning/4_combine_unitsconversionsB.do

** Continue one
gen btu_price_init = ee_btu_price_init * elec_btu_share_i_init + (1 / btu_cf_raw_i_init) * (1-elec_btu_share_i_init)
replace btu_price_init = ee_btu_price_init if mi(btu_price_init)
gen lbtu_price_init = log(btu_price_init)

* calculate initial co2 price
gen co2_price_init = ee_co2_price_init * elec_co2_share_i_init + (1 / co2_cf_raw_i_init) * (1-elec_co2_share_i_init)
replace co2_price_init = ee_co2_price_init if mi(co2_price_init)
gen lco2_price_init = log(co2_price_init)

* drop intermediate vars
drop *_share_init *_share_i_init *_avg *_avg2 ee_co2_price_init *raw_i_init *raw_i *raw *raw_init *_m *m_i ee_btu_price_init elec_btu_share_i elec_btu_share elec_co2_share_i elec_co2_share bestsic*
la var co2_price_init   "Entry Year Fuel Cost per kg CO2 (USD 2011), incl. interpolated yrs"
la var lco2_price_init  "log of co2_price_init" 
la var btu_price_init   "Entry Year Fuel Cost per million BTU (USD, 2011), incl. interpolated yrs"
la var lbtu_price_init  "log of btu_price_init"

* drop unused instruments that are weaker
drop instr_B instr_V instr_B_init instr_V_init instr_V_cl instr_V_cl_init instr_V_ng instr_V_ng_init instr_V_pa instr_V_pa_init


order year firstyear lbdnum firmid bestnaics zip county fipsst postalst state wt_cmf flagb

sa "/projects/data/dataSTATA/combined/combined_iv_energy.dta", replace


* combine with ASM coefs

use "/projects/data/dataSTATA/economic/asmFuels_merged_collapsed.dta", clear
keep bestnaics year co2_cf_raw btu_cf_raw
tempfile coefs
sa `coefs', replace


use "/projects/data/dataSTATA/combined/combined_iv.dta", clear
merge m:1 bestnaics year using `coefs', assert(1 2 3) keep(1 3) nogen
keep if year < 1982

* interpolate missing years emissions coefficients
foreach var in co2_cf_raw  btu_cf_raw {
	bys bestnaics: ipolate `var' year, gen(`var'_i) epolate
	replace `var'   = 0   if `var' < 0
	replace `var'_i = 0 if `var'_i < 0
	la var `var'_i "`var', interpolated for missing years"
}


* convert kwh to million btu. purchased electricity is measured in 1000 kwh
* Units conversions performed in secondary script for disclosure purposes
do projects/programs/codeSTATA/A_cleaning/4_combine_unitsconversionsC.do

*>----------------- using imputed fuel coefs ----------------------------------<

* co2 and btu from coefs
gen co2_raw_m   = co2_cf_raw * cf * 1000 // cf is in 1000s
gen co2_raw_m_i = co2_cf_raw_i * cf * 1000
gen btu_raw_m   = btu_cf_raw * cf * 1000
gen btu_raw_m_i = btu_cf_raw_i * cf * 1000

* energy intensity
gen btu_tot_m   = elec_btu_m + btu_raw_m // multiply by 1000 since _cf is in $ and cf is $1000
gen btu_tot_m_i = elec_btu_m + btu_raw_m_i
gen co2_tot_m   = elec_co2_m + co2_raw_m
gen co2_tot_m_i = elec_co2_m + co2_raw_m_i

gen btu_int_asm = btu_tot_m_i / (tvs * 1000)
gen co2_int_asm = co2_tot_m_i / (tvs * 1000) 

la var co2_int_asm "CO2 Energy Intensity (kg CO2 per USD 2011), ASM" 
la var btu_int_asm "BTU Energy Intensity (million BTU per USD revenue, 2011), ASM"

keep lbdnum year co2_int_asm btu_int_asm
tempfile asm_coefs
sa `asm_coefs', replace

use "/projects/data/dataSTATA/combined/combined_iv_energy.dta", clear
merge 1:1 lbdnum year using `asm_coefs', assert(1 2 3) keep(1 3) nogen
gen co2_int_i_both = co2_int_i
replace co2_int_i_both = co2_int_asm if !mi(co2_int_asm)
gen lco2_int_i_both = log(co2_int_i_both)
la var co2_int_i_both "CO2 Intensity Including ASM Fuel Trailers, incl. interpolated years"
la var lco2_int_i_both "log CO2 Intensity Including ASM Fuel Trailers, incl. interpolated years"

gen btu_int_i_both = btu_int_i
replace btu_int_i_both = btu_int_asm if !mi(co2_int_asm)
gen lbtu_int_i_both = log(btu_int_i_both)
la var btu_int_i_both "CO2 Intensity Including ASM Fuel Trailers, incl. interpolated years"
la var lbtu_int_i_both "log CO2 Intensity Including ASM Fuel Trailers, incl. interpolated years"

gen co2_int_both = co2_int
replace co2_int_both = co2_int_asm if !mi(co2_int_asm)
gen lco2_int_both = log(co2_int_both)
la var co2_int_both "CO2 Intensity Including ASM Fuel Trailers"
la var lco2_int_both "log CO2 Intensity Including ASM Fuel Trailers"

gen btu_int_both = btu_int
replace btu_int_both = btu_int_asm if !mi(btu_int_asm)
gen lbtu_int_both = log(btu_int_both)
la var btu_int_both "CO2 Intensity Including ASM Fuel Trailers"
la var lbtu_int_both "log CO2 Intensity Including ASM Fuel Trailers"

* trim obvious outliers from imputation
sum co2_int_i_both, detail
drop if co2_int_i_both > `r(p99)'
sum btu_int_i_both, detail
drop if btu_int_i_both > `r(p99)'

sa "/projects/data/dataSTATA/combined/combined_iv_energy.dta", replace

*>------------------------- merge prices from EIA for robustness --------------<

import delimited "/projects/dstafftransfer/transfer.20200428/naics5811.csv", clear 
gen pien_n = pien if year == 2011
bys naics: egen pien_2011 = mean(pien_n)
gen pien_dfltr = pien_2011 / pien
drop pien_n pien_2011 
	
collapse (mean) pien_dfltr, by(year)
ren pien_dfltr pien_avg
tempfile energy_deflators
sa `energy_deflators', replace


import excel "/projects/dstafftransfer/transfer.20210305/avgprice_annual.xlsx", sheet("Price") cellrange(A2:I4440) firstrow clear

keep if IndustrySectorCategory == "Total Electric Industry"

keep Year State Industrial
ren Year year
ren State postalst
ren Industrial eia_price

* deflate
merge m:1 year using `energy_deflators', assert(1 2 3) keep(1 3) nogen
replace eia_price = eia_price * pien_avg

tempfile avgprices_ext
sa `avgprices_ext', replace

ren year firstyear
ren eia_price eia_price_init
tempfile avgprices_init
sa `avgprices_init', replace

* merge in current prices
use "/projects/data/dataSTATA/combined/combined_iv_energy.dta", clear
merge m:1 year postalst using `avgprices_ext', assert(1 2 3) keep(1 3) nogen
replace eia_price = eia_price / 100 
* exclude missing values for eia_price
gen leia_price = log(eia_price)
la var eia_price "Average State Electricity Price from EIA ($ per kWh, 2011)"
la var leia_price "log of eia_price"

* merge in initial prices
merge m:1 firstyear postalst using `avgprices_init', assert(1 2 3) keep(1 3) nogen
replace eia_price_init = eia_price_init / 100
gen leia_price_init = log(eia_price_init)
la var eia_price_init "Average Initial State Electricity Price from EIA ($ per kWh, 2011)"
la var leia_price_init "log of eia_price"

sa "/projects/data/dataSTATA/combined/combined_iv_energy.dta", replace
